/*****************************************************************************
Description:	This program calculates pscores for lottery risk at the student x program
*				level using marginal priorities, program cutoffs and IK bandwidths from
*				15_create_pscores_mdrd2, then:
*				(i)		defining big thetas
*				(ii)	calculating MIDs
*				This version uses simulated lottery numbers for those with missing lottery
*				numbers, as well as simulated offers.
*	----------------------------------------------------------------------------
*	inputs: 		program_pscore_{yyyy}_before_theta.dta
*						> output from 15_create_pscores_mdrd2 containing marginal
*						> priorities, cutoffs, and bandwidths
*	----------------------------------------------------------------------------
*	outputs: 		program_pscore_nobw{mod}_{yyyy}.dta
*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/

*	settings
	set more off
	set trace off
	set tracedepth 1
	pause on

	args modification_str bw

	local years 2016 2017 2018

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

/*****************************************************************************\
 We need the following primitives:

	i. 		marginal priority 	(from 15_create_pscores)
	ii. 	cutoff 				(from 15_create_pscores)
	iii.	bandwidth			(from 15_create_pscores)
	iv.		l's
	v.		MID
	vi. 	pscores

\******************************************************************************/

// Loop over years
foreach year of local years {
	global year "`year'"

	// Load file that already has primitives i.-iii. set up, so both files use the same info.
	use "${cleandata}program_pscore_`bw'`modification_str'_ms_`year'_before_theta.dta", clear

* ----------------------------------------------------------------------------
* iv. l's
* ----------------------------------------------------------------------------

	// For lottery risk, we ignore the variation around the cutoff for screened schools
	// so if an applicant clears the cutoff, they are l=a and if they don't
	// they are l=n, hence never l=c

	*	Always seated
		// An applicant clears marginal priority, or applicant clears rank cutoff at
		// a non lottery program
		gen l_a = 	(marginal_priority > global_priority) ///
						| (lottery_flag == 0 & marginal == 1 & hs_rank_centered <= 0 ) ///
						| (lottery_flag == 1 & marginal == 1 & (screened_uses_lottery == 1 ) & (hs_rank_centered < 0)  )

	*	Never seated
		// An applicant fails to clear marginal priority or is above the rank cutoff
		// at a screened program
		gen l_n =  (marginal_priority < global_priority ) ///
						| (lottery_flag == 0 & marginal == 1 & hs_rank_centered > 0  )   ///
						| (lottery_flag == 1 & marginal == 1 & (screened_uses_lottery == 1 ) & (hs_rank_centered > 0)  )

	*	Conditionally seated
		// Only considers lottery schools
		gen l_c = 	(lottery_flag == 1 & marginal_priority == global_priority & (screened_uses_lottery == 0 )  ) ///
						| (lottery_flag == 1 & marginal_priority == global_priority & (screened_uses_lottery == 1 ) & (hs_rank_centered == 0)  )

	// Check we partition the set of applicants
	egen check = rowtotal(l_?)
	su check
	assert `r(min)' == 1 & `r(max)' == 1
	drop check

* ----------------------------------------------------------------------------
* v. MID
* ----------------------------------------------------------------------------

	* 	case 1: always get more preferred
		// applicant is ever in l=a at any schools above school s
		sort stu choice
		gen ever_seated_more_preferred = 0
		la var ever_seated_more_preferred " 1 if ever cleared marginal priority at a more preferred school"
		by stu: replace ever_seated_more_preferred =  max(ever_seated_more_preferred[_n-1 ], l_a[ _n-1 ] ) if _n > 1  //maximum so that if its ever 1 the following chain will be 1.

	* 	case 2: never get more preferred
		// applicant is always in l=n at any higher ranked school
		sort stu choice
		gen never_get_more_preferred = 1  //by convention mid is 0 at first choice, because you can never get a better choice
		la var never_get_more_preferred "never clears marginal priority at more preferred schools"
		by stu: replace never_get_more_preferred =  min(never_get_more_preferred[_n-1 ], l_n[ _n-1 ] ) if _n > 1

	* 	case 3: conditionally get more preferred
		// is applicant ever marginal at a more preferred school?
		sort stu choice
		gen ever_marginal_more_preferred = 0
		by stu: replace ever_marginal_more_preferred = max(ever_marginal_more_preferred[_n-1 ], l_c[ _n-1 ] ) if _n > 1

		// applicant always either theta_c or theta_n at more preferred schools (never theta_a)?
		gen either_theta_cn = max(l_c, l_n)
		gen always_theta_cn_more_preferred = 1
		by stu: replace always_theta_cn_more_preferred =  min(always_theta_cn_more_preferred[_n-1 ], either_theta_cn[ _n-1 ] ) if _n > 1

		// applicant never theta_a but with at least one marginal school? (non-degenerate better set risk)
		gen sometimes_get_more_preferred = always_theta_cn_more_preferred == 1 & ever_marginal_more_preferred == 1
		la var sometimes_get_more_preferred "not guaranteed a spot at a higher rank school but at least marginal in a more preferred school"

	// Check that these definitions partition the set of applicants
	egen check = rowtotal(sometimes_get_more_preferred  ever_seated_more_preferred never_get_more_preferred)
	su check
	assert `r(max)' == 1 & `r(min)'  == 1
	drop check

	***	MID computation
		// MID boils down to risk generated by lottery schools in the better set

		// Generate variable as missing
		gen double mid = .

	* case 1
		// Set to 0 if applicant is always in theta_n at more preferred lottery schools
		replace mid = 0 if never_get_more_preferred == 1

	* case 2
		// Set to 1 if applicant are ever theta_a at a more preferred lottery school
		// (explicitly restricting to lottery schools doesn't change p-score calculation
		// since risk will be degenerate anyways)
		replace mid = 1 if ever_seated_more_preferred == 1

	* case 3
		// Get non-degenerate better set risk
		sort  stu choice
		// Only consider the lagged cutoff if applicant is theta c at that lottery school.
		by  stu: gen lagged_lottery_cutoff = lottery_cutoff[_n - 1 ] * l_c[_n-1] * lottery_flag[_n-1]

		by  stu: replace mid = max(mid[_n - 1], lagged_lottery_cutoff )  if sometimes_get_more_preferred == 1

		// Replace first lottery choice to zero.
		by  stu: replace mid = 0  if _n == 1

	// Fill in mid for non lottery schools, we need this to calculate the pscore
		sort  stu choice
		by stu: replace mid = 0 if _n ==1
		by stu: replace mid = mid[_n-1] if _n > 1 & mid == .

* ----------------------------------------------------------------------------
* vi. p-scores
* ----------------------------------------------------------------------------

	sort stu choice

	// Local score at screened schools
	gen double pscore_rank = 0 if !lottery_flag & ( l_n == 1 | ever_seated_more_preferred == 1 )
	replace pscore_rank = 1 if !lottery_flag & ( l_a == 1 &  ever_seated_more_preferred == 0 )

	sort stu choice

	*** Code lottery risk pscore

		gen double pscore = .
		gen double one_minus_mid = 1 - mid
		la var one_minus_mid "this is lottery number truncation"

	* 	Create degenerate cases
		replace pscore = 0 if l_n == 1 | ever_seated_more_preferred == 1
		replace pscore = 1 if l_a == 1 & ever_seated_more_preferred == 0

		replace pscore =  one_minus_mid  ///
			if l_a == 1 & ever_seated_more_preferred == 0

	*	Lottery school with risk at s
		replace pscore =  one_minus_mid *  max(0, (lottery_cutoff - mid)/one_minus_mid) ///
			if l_c == 1 & ever_seated_more_preferred == 0 & lottery_flag == 1

	* This is henceforth the pscore_formula
	ren pscore pscore_form

	// Compute frequency score
	bys programcode l_a l_c l_n mid: egen double pscore_freq = mean(offer)

	// Compute frequency score for applicants who are in l_n, where mid doesn't matter.
	bys programcode l_n: egen double frequency_offer_intermediate = mean(offer)

	replace pscore_freq = frequency_offer_intermediate if l_n == 1
	drop frequency_offer_intermediate

	// Save
	save "${cleandata}program_pscore_nobw_ms_`year'.dta", replace
}
